/* 
    Purpose: This file merges 1940-1990 income scores
             by level of variation, separately for 
             fathers and mothers. 

    Notes:  (1) Predicted 1950 income is made with personal income.
                Not possible to calculate family income in 1950.
                See 2a and 2b files for notes.
            (2) Income scores for adult child survey respondents
                (merged in 2_Clean_PooledData) will also be saved 
                at some levels of variation.

    Creates: All output files have the prefix 
             "IncomeScores_Coarsened_".
*/
clear
set more off
cd "$Mydirectory1/1_DataSources/CensusData/output"

*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*    

***************************************
*** FATHERS
***************************************

*--------------------*
* 1. OCCUPATION-ONLY
*--------------------*
* 1950
    use incomescores_fathers1950_byocc.dta, clear
    keep fatheroccej avg_occscore_1950_byocc avg_inctot_1950_byocc flag* 

* 1940
    merge 1:1 fatheroccej using avgincomes_fathers1940_byocc_CWfix.dta
    drop _merge     
    keep fatheroccej avg_occscore_1950_byocc avg_inctot_1950_byocc avg_HHinc_* avg_incwage* flag* 

* 1960-1990
    foreach d in 1960 1970 1980 1990 {
        merge 1:1 fatheroccej using incomescores_fathers`d'_byocc.dta
        assert _merge==3
        assert fatheroccej==. if _merge==1 
        drop _merge
        keep fatheroccej avg_occscore_1950_byocc avg_inctot_1950_byocc  avg_HHinc* avg_inctot* avg_incwage* flag*
    }

    save IncomeScores_Coarsened_all.dta, replace
      
    ***VERSION FOR ADULT CHILD SURVEY RESPONDENTS***
    rename fatheroccej occRej
    drop if occRej==99

    keep occRej avg_occscore_1950_byocc
    
    foreach var of varlist avg_occscore_1950_byocc {
        rename `var' `var'_R
        label var `var'_R "Respondent income score using `var'"
    }
    
    save IncomeScores_Coarsened_all_R.dta, replace
    
    
*----------------------------------*
*** 2. OCCUPATION x RACE x SOUTH  
*----------------------------------*
 
* 1940  
    use avgincomes_fathers1940_byrace_bysouth_CWfix.dta, clear
    keep fatheroccej race south_merge avg_incwage_byr_bys_CWfix avg_HHinc_byr_bys_CWfix ///
    avg_HHinc_1940_byrace_bysouth avgincwage_1940_byrace_bysouth 
      
* Alternative weight (1940)
    merge 1:1 fatheroccej race south_merge using avgincomes_fathers1940_byrace_bysouth_altwgt_CWfix.dta
    drop _merge imputed_occ_CW avg_incwage_byr_bys_altwgt_CWfix avg_HHinc_byr_bys_altwgt_CWfix 
 
* 1950-1990
    foreach d in 1950 1960 1970 1980 1990 {
        merge 1:1 fatheroccej race south_merge using incomescores_fathers`d'_byocc_byr_bys.dta
        assert _merge==3
        drop _merge number_`d'obs_byocc_byr_bys 
        if `d'!=1950 drop avg_faminc_`d'_byocc_byr_bys 
    }

* 2000, 2010, 2019
    foreach d in 2000 2010 2019 {
        merge 1:1 fatheroccej race south_merge using incomescores_fathers`d'_byrace_bysouth.dta
        assert fatheroccej==99 if _merge!=3
        drop number_`d'obs_byocc_byr_bys _merge 
    }

* Save
    save IncomeScores_Coarsened_byrace_bysouth.dta, replace
    
    ***VERSION FOR ADULT CHILD SURVEY RESPONDENTS***
    keep fatheroccej race south_merge avg_HHinc_byr_bys_CWfix avg_inctot_1950_byocc_byr_bys avg_HHinc_1940_byrace_bysouth avg_HHinc_1960_byocc_byr_bys avg_HHinc_1970_byocc_byr_bys avg_HHinc_1980_byocc_byr_bys avg_HHinc_1990_byocc_byr_bys avg_HHinc_2000_byocc_byr_bys avg_HHinc_2010_byocc_byr_bys avg_HHinc_2019_byocc_byr_bys

    tab fatheroccej, m
    drop if fatheroccej==99 
    tab fatheroccej, m

    foreach var of varlist avg_HHinc_* avg_inc* {
        rename `var' `var'_R
        label var `var'_R "Respondent income score using `var' by race and South"
    }
    
    rename south_merge south_merge_son
    rename fatheroccej occRej

    save IncomeScores_Coarsened_byrace_bysouth_R.dta, replace 
    
*-----------------------------------------*
*** 3. OCCUPATION X RACE x SOUTH x EDU 
*-----------------------------------------*
    /*There are no 1950 income scores
      at this level. */

*1940
    use avgincomes_fathers1940_byrace_bysouth_byedu_CWfix.dta, clear  
    drop imputed_occ_CW avgincwage_1940_byr_south_edu avg_HHinc_1940_byr_south_edu avg_incwage_byr_bys_edu_CWfix 

* 1960-1990
    foreach d in 1960 1970 1980 1990 {

        merge 1:1 fatheroccej race south_merge edu using incomescores_fathers`d'_byors_byedu.dta
        assert _merge==3
        assert fatheroccej==. | race==. | south_merge==. | edu==. if _merge==1 
        drop _merge
        keep fatheroccej race south_merge edu avg_HHinc_* flag*
    }

    save IncomeScores_Coarsened_1940_byrace_bysouth_byedu.dta, replace
        
*------------------------------------------*
*** 4. OCCUPATION x RACE
*------------------------------------------*
* 1940
    use avgincomes_fathers1940_byrace_CWfix.dta, clear
    keep fatheroccej race avg_HHinc_byrace_CWfix avg_HHinc_1940_byrace 

 * 1950-1990
    foreach d in 1950 1960 1970 1980 1990 {
        merge 1:1 fatheroccej race using incomescores_fathers`d'_byocc_byr.dta
        assert fatheroccej==. | race==. | fatheroccej==99 if _merge==1 
        drop _merge number_`d'obs_byocc_byr
        if `d'!=1950 drop avg_inctot_`d'_byocc_byr avg_faminc_`d'_byocc_byr
    }
    
    save IncomeScores_Coarsened_byrace.dta, replace
    
*-------------------------------------------*
*** 5. OCCUPATION x RACE x REGION
*-------------------------------------------*
    /*There are no 1950 income scores
      at this level. */
    
* 1940
    use avgincomes_fathers1940_byrace_byregion_CWfix.dta, clear 
    keep fatheroccej race region_merge avg_HHinc_byr_byreg_CWfix 

* 1960-1990
    foreach d in 1960 1970 1980 1990 {

        merge 1:1 fatheroccej race region_merge using incomescores_fathers`d'_byocc_byr_byreg.dta
        assert _merge==3
        drop _merge
        keep fatheroccej race region_merge avg_HHinc_* flag*
    }
    
    save IncomeScores_Coarsened_byrace_byregion.dta, replace
    
*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*    

***************************************
*** MOTHERS
***************************************

*---------------------------------------*
*** 6. OCCUPATION x RACE x SOUTH 
*---------------------------------------*        
* 1940
    use avgincomes_mothers1940_byrace_bysouth_CWfix.dta, clear  
    keep motheroccej race south_merge mom_HHinc_byr_bys_CWfix 

* 1950 
    merge 1:1 motheroccej race south_merge using incomescores_mothers1950_byocc_byr_bys.dta
    drop _merge number_1950obs_byocc_byr 

* 1960-1990
    foreach d in 1960 1970 1980 1990 {

        merge 1:1 motheroccej race south_merge using incomescores_mothers`d'_byocc_byr_bys.dta
        capture noisily: assert motheroccej==. | race==. | south_merge==. if _merge==1 
        drop _merge
        keep motheroccej race south_merge mom_HHinc_byr_bys_CWfix avg_inctot_1950_byocc_byr_bys avg_HHinc_*
    }

    ren *_byocc_byr_bys *_byocc_rs 
    ren avg_* mom_avg_* 

    save IncomeScores_Coarsened_byrace_bysouth_moms.dta, replace    
